home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Atari Compendium
/
The Atari Compendium (Toad Computers) (1994).iso
/
files
/
compress
/
unix
/
zip19p1.zoo
/
deflate.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-08-25
|
26KB
|
691 lines
/*
Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
Kai Uwe Rommel and Igor Mandrichenko.
Permission is granted to any individual or institution to use, copy, or
redistribute this software so long as all of the original files are included
unmodified, that it is not sold for profit, and that this copyright notice
is retained.
*/
/*
* deflate.c by Jean-loup Gailly.
*
* PURPOSE
*
* Identify new text as repetitions of old text within a fixed-
* length sliding window trailing behind the new text.
*
* DISCUSSION
*
* The "deflation" process depends on being able to identify portions
* of the input text which are identical to earlier input (within a
* sliding window trailing behind the input currently being processed).
*
* The most straightforward technique turns out to be the fastest for
* most input files: try all possible matches and select the longest.
* The key feature is of this algorithm is that insertion and deletions
* from the string dictionary are very simple and thus fast. Insertions
* and deletions are performed at each input character, whereas string
* matches are performed only when the previous match ends. So it is
* preferable to spend more time in matches to allow very fast string
* insertions and deletions. The matching algorithm for small strings
* is inspired from that of Rabin & Karp. A brute force approach is
* used to find longer strings when a small match has been found.
* A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
* (by Leonid Broukhis).
* A previous version of this file used a more sophisticated algorithm
* (by Fiala and Greene) which is guaranteed to run in linear amortized
* time, but has a larger average cost and uses more memory. However
* the F&G algorithm may be faster for some highly redundant files if
* the parameter max_chain_length (described below) is too large.
*
* ACKNOWLEDGEMENTS
*
* The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
* I found it in 'freeze' written by Leonid Broukhis.
* Thanks to many info-zippers for bug reports and testing.
*
* REFERENCES
*
* APPNOTE.TXT documentation file in PKZIP 2.0 distribution.
*
* A description of the Rabin and Karp algorithm is given in the book
* "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
*
* Fiala,E.R., and Greene,D.H.
* Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
*
* INTERFACE
*
* void lm_init (int pack_level, ush *flags)
* Initialize the "longest match" routines for a new file
*
* ulg deflate (void)
* Processes a new input file and return its compressed length. Sets
* the compressed length, crc, deflate flags and internal file
* attributes.
*/
#include "zip.h"
/* ===========================================================================
* Configuration parameters
*/
/* Compile with MEDIUM_MEM to reduce the memory requirements or
* with SMALL_MEM to use as little memory as possible.
* Warning: defining these symbols affects MATCH_BUFSIZE and HASH_BITS
* (see below) and thus affects the compression ratio. The compressed output
* is still correct, and might even be smaller in some cases.
*/
#ifdef SMALL_MEM
# define HASH_BITS 13 /* Number of bits used to hash strings */
#else
#ifdef MEDIUM_MEM
# define HASH_BITS 14
#else
# define HASH_BITS 15
/* For portability to 16 bit machines, do not use values above 15. */
#endif
#endif
#define HASH_SIZE (unsigned)(1<<HASH_BITS)
#define HASH_MASK (HASH_SIZE-1)
#define WMASK (WSIZE-1)
/* HASH_SIZE and WSIZE must be powers of two */
#define NIL 0
/* Tail of hash chains */
#define FAST 4
#define SLOW 2
/* speed options for the general purpose bit flag */
#ifndef TOO_FAR
# define TOO_FAR 4096
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
/* ===========================================================================
* Local data used by the "longest match" routines.
*/
typedef ush Pos;
typedef unsigned IPos;
/* A Pos is an index in the character window. We use short instead of int to
* save space in the various tables. IPos is used only for parameter passing.
*/
#ifndef DYN_ALLOC
uch far window[2L*WSIZE];
/* Sliding window. Input bytes are read into the second half of the window,
* and move to the first half later to keep a dictionary of at least WSIZE
* bytes. With this organization, matches are limited to a distance of
* WSIZE-MAX_MATCH bytes, but this ensures that IO is always
* performed with a length multiple of the block size. Also, it limits
* the window size to 64K, which is quite useful on MSDOS.
* To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
* be less efficient since the data would have to be copied WSIZE/BSZ times)
*/
Pos far prev[WSIZE];
/* Link to older string with same hash index. To limit the size of this
* array to 64K, this link is maintained only for the last 32K strings.
* An index in this array is thus a window index modulo 32K.
*/
Pos far head[HASH_SIZE];
/* Heads of the hash chains or NIL */
#else
uch far * near window = NULL;
Pos far * near prev = NULL;
Pos far * near head;
#endif
long block_start;
/* window position at the beginning of the current output block. Gets
* negative when the window is moved backwards.
*/
local unsigned near ins_h; /* hash index of string to be inserted */
#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
/* Number of bits by which ins_h and del_h must be shifted at each
* input step. It must be such that after MIN_MATCH steps, the oldest
* byte no longer takes part in the hash key, that is:
* H_SHIFT * MIN_MATCH >= HASH_BITS
*/
unsigned int near prev_length;
/* Length of the best match at previous step. Matches not greater than this
* are discarded. This is used in the lazy match evaluation.
*/
unsigned near strstart; /* start of string to insert */
unsigned near match_start; /* start of matching string */
local int near eofile; /* flag set at end of input file */
local unsigned near lookahead; /* number of valid bytes ahead in window */
unsigned near max_chain_length;
/* To speed up deflation, hash chains are never searched beyond this length.
* A higher limit improves compression ratio but degrades the speed.
*/
local unsigned int max_lazy_match;
/* Attempt to find a better match only when the current match is strictly
* smaller than this value.
*/
int near good_match;
/* Use a faster search when the previous match is longer than this */
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
* exclude worst case performance for pathological files. Better values may be
* found for specific files.
*/
typedef struct config {
int good_length;
int max_lazy;
unsigned max_chain;
uch flag;
} config;
local config configuration_table[10] = {
/* good lazy chain flag */
/* 0 */ {0, 0, 0, 0}, /* store only */
/* 1 */ {4, 4, 16, FAST}, /* maximum speed */
/* 2 */ {6, 8, 16, 0},
/* 3 */ {8, 16, 32, 0},
/* 4 */ {8, 32, 64, 0},
/* 5 */ {8, 64, 128, 0},
/* 6 */ {8, 128, 256, 0},
/* 7 */ {8, 128, 512, 0},
/* 8 */ {32, 258, 1024, 0},
/* 9 */ {32, 258, 4096, SLOW}}; /* maximum compression */
/* Note: the current code requires max_lazy >= MIN_MATCH and max_chain >= 4
* but these restrictions can easily be removed at a small cost.
*/
#define EQUAL 0
/* result of memcmp for equal strings */
/* ===========================================================================
* Prototypes for local functions. Use asm version by default for
* MSDOS but not Unix. However the asm version version is recommended
* for 386 Unix.
*/
#ifdef ATARI_ST
# undef MSDOS /* avoid the processor specific parts */
#endif
#if defined(MSDOS) && !defined